########################################################
## This code carries out parameter tuning 
## for the submodels with more employment history variables
########################################################

########################################################
## Preparation of the workspace
########################################################

## remove all objects from the current workspace
rm(list=ls())

## load the required packages
library(haven)
library(caret)
library(randomForest)
library(doParallel)
library(mice)
library(plyr)
library(dplyr)
library(VIM)
library(base)
library(ranger)
library(glmnet)
library(xgboost)
library(ggplot2)

## display the current time -> to check how much time it takes to run the code
start_time = Sys.time()

## set the directories
main <- "placeholder_main"

dataDirectory <- paste0(main, "/Data")
RDirectory <- paste0(main, "/Programs/Output Generation")

## Load the tuning and training functions:
source(paste0(RDirectory, "/102_0_caret_parameter_tuning_Function.R"))
source(paste0(RDirectory, "/103_0_caret_predictions_Function.R"))


########################################################
## Define the locals for the loop
########################################################
 
# Define the outcome variables that are predicted
dependent_variable <- c("emplAft6M_0M_In")

# Here we run the cumulative sub-models (progressively adding employment history),
# as well as the marginal ones:
model_list <- c( # Variable groups
            "BasicWithEmplHist_Basic", "BasicWithEmplHist_Yminus1", "BasicWithEmplHist_Yminus2",
            "BasicWithEmplHist_Yminus3", "BasicWithEmplHist_Yminus4", "BasicWithEmplHist_Yminus5", "BasicWithEmplHist_All",
            "BasicWithEmplHist_Marg_Yminus1", "BasicWithEmplHist_Marg_Yminus2","BasicWithEmplHist_Marg_Yminus3",
            "BasicWithEmplHist_Marg_Yminus4","BasicWithEmplHist_Marg_Yminus5","BasicWithEmplHist_Marg_PreUnemp",
            # Individual variables;
            "BasicWithEmplHist_IndivVars_Seq_DaysUnemp_2Years", "BasicWithEmplHist_IndivVars_Seq_unemplSpells2Ybefore", 
            "BasicWithEmplHist_IndivVars_Seq_nEmployers2Y", "BasicWithEmplHist_IndivVars_Seq_DaysOnDI_2Years",  "BasicWithEmplHist_IndivVars_Seq_tenure", 
            "BasicWithEmplHist_IndivVars_Seq_L_nEmployees_L1L2", "BasicWithEmplHist_IndivVars_Seq_L_firmSizeChange_L1L2", 
            "BasicWithEmplHist_IndivVars_Seq_L_layoffRate_L1L2", "BasicWithEmplHist_IndivVars_Seq_missings", 
            "BasicWithEmplHist_IndivVars_Seq", 
            "BasicWithEmplHist_IndivVars_Marg_DaysUnemp_2Years", "BasicWithEmplHist_IndivVars_Marg_unemplSpells2Ybefore", 
            "BasicWithEmplHist_IndivVars_Marg_nEmployers2Y", "BasicWithEmplHist_IndivVars_Marg_DaysOnDI_2Years",  "BasicWithEmplHist_IndivVars_Marg_tenure", 
            "BasicWithEmplHist_IndivVars_Marg_L_nEmployees_L1L2", "BasicWithEmplHist_IndivVars_Marg_L_firmSizeChange_L1L2", 
            "BasicWithEmplHist_IndivVars_Marg_L_layoffRate_L1L2", "BasicWithEmplHist_IndivVars_Marg_missings")

# In principle we only want
years <- 2006

########################################################
## Running the loop
########################################################   

for (model in model_list) {
  
  for (y in years) {
    
    
    for (dependent in dependent_variable){
      
      if (dependent == "emplAft6M_0M_In") {
        # Load the dataset:
        data <- read_dta(paste0(dataDirectory, "/002_DataForR_", model, "_", y,".dta"))
        
        
        # Run the tuning function:
        parameters <- tuning(data = data, dependent = dependent,
                             s_tuning = 0.1, seed = 2111, noisily = TRUE)
        
        write.csv(parameters$rfgrid_final, file = paste0(dataDirectory,"/102_rfgrid_" , model,"_", dependent, "_", y, ".csv"), row.names = FALSE)
        write.csv(parameters$boostgrid_final, file = paste0(dataDirectory,"/102_boostgrid_", model, "_", dependent, "_", y, ".csv"), row.names = FALSE)
        write.csv(parameters$lassogrid_final, file = paste0(dataDirectory,"/102_lassogrid_", model, "_", dependent, "_", y, ".csv"), row.names = FALSE)
        write.csv(parameters$rfgrid_search, file = paste0(dataDirectory,"/102_rfgrid_search_" , model,"_", dependent, "_", y, ".csv"), row.names = FALSE)
        write.csv(parameters$boostgrid_search, file = paste0(dataDirectory,"/102_boostgrid_search_", model, "_", dependent, "_", y, ".csv"), row.names = FALSE)
        write.csv(parameters$lassogrid_search, file = paste0(dataDirectory,"/102_lassogrid_search_", model, "_", dependent, "_", y, ".csv"), row.names = FALSE)
        
        # Run the prediction function
        results <- estimating(data = data, dependent = dependent,
                              parameters = parameters,
                              s_tuning = 0.1, s_training = 0.3, seed = 2111, noisily = TRUE)
        
      } else {
        results <- estimating(data = data, dependent = dependent,
                              parameters = parameters,
                              s_tuning = 0, s_training = 0.4, seed = 2111, noisily = TRUE)
      }
      
      # Save models
      models <- results$models
      save(models, 
           file=paste0(dataDirectory,"/103_Models_", model,"_",dependent, "_", y, ".rda"))
      
      # Save predictions
      write.csv(results$output, 
                file=paste0(dataDirectory,"/103_predictionsR_", model,"_",dependent, "_", y, ".csv"))
      
    }
  }
} 
